library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.6     ✔ purrr   0.3.4
✔ tibble  3.1.7     ✔ dplyr   1.0.9
✔ tidyr   1.2.0     ✔ stringr 1.4.0
✔ readr   2.1.2     ✔ forcats 0.5.1
── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(modelr)
library(caret)
Loading required package: lattice
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     

Attaching package: ‘caret’

The following object is masked from ‘package:purrr’:

    lift
savings <- CodeClanData::savings
savings

Data = Pattern + Noise

model_overfit <- lm(savings ~ .,
                    data = savings)

summary(model_overfit)

Call:
lm(formula = savings ~ ., data = savings)

Residuals:
   Min     1Q Median     3Q    Max 
-32617  -6684   -264   6481  31884 

Coefficients: (1 not defined because of singularities)
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                 2.050e+04  2.591e+03   7.915 5.81e-15 ***
genderMale                 -1.049e+03  1.706e+03  -0.615   0.5386    
nameAngus                   1.531e+03  1.755e+03   0.873   0.3830    
nameBert                   -1.973e+02  1.726e+03  -0.114   0.9090    
nameBonnie                  1.261e+03  1.809e+03   0.697   0.4859    
nameCara                   -8.515e+02  1.807e+03  -0.471   0.6376    
nameCharles                 2.419e+03  1.786e+03   1.354   0.1759    
nameDonald                  1.253e+02  1.823e+03   0.069   0.9452    
nameDora                   -5.149e+02  1.995e+03  -0.258   0.7964    
nameEd                      1.249e+03  1.693e+03   0.738   0.4606    
nameEmmy                   -1.062e+03  1.816e+03  -0.585   0.5587    
nameFlorence                9.245e+02  1.833e+03   0.504   0.6142    
nameFreddy                  6.287e+02  1.792e+03   0.351   0.7257    
nameGilly                  -1.518e+03  1.764e+03  -0.861   0.3896    
nameGord                   -1.246e+02  1.756e+03  -0.071   0.9435    
nameHarry                   3.218e+02  1.742e+03   0.185   0.8535    
nameHelena                  4.456e+02  1.748e+03   0.255   0.7988    
nameIndia                   9.007e+02  1.785e+03   0.505   0.6139    
nameIvan                    9.941e+02  1.780e+03   0.559   0.5766    
nameJimmy                          NA         NA      NA       NA    
nameJools                   6.287e+02  1.772e+03   0.355   0.7228    
surnameFraser              -1.113e+03  1.673e+03  -0.665   0.5060    
surnameGaldie               2.034e+03  1.839e+03   1.106   0.2689    
surnameHalcrow             -7.546e+02  1.819e+03  -0.415   0.6783    
surnameHenderson           -3.788e+02  1.751e+03  -0.216   0.8288    
surnameIrvine               3.610e+03  1.713e+03   2.108   0.0353 *  
surnameJamieson             4.129e+02  1.831e+03   0.226   0.8216    
surnameJohnson              4.237e+02  1.826e+03   0.232   0.8165    
surnameLaurenceso          -3.532e+02  1.839e+03  -0.192   0.8478    
surnameLeask                2.301e+03  1.697e+03   1.356   0.1754    
surnameManson               1.409e+03  1.670e+03   0.843   0.3992    
surnameMowat                9.803e+02  1.699e+03   0.577   0.5641    
surnameNicolson            -1.051e+03  1.816e+03  -0.578   0.5631    
surnamePeterson             3.189e+03  1.706e+03   1.869   0.0618 .  
surnameRobertson            2.960e+03  1.650e+03   1.794   0.0731 .  
surnameSinclair             4.578e+02  1.790e+03   0.256   0.7982    
surnameSmith                6.355e+02  1.707e+03   0.372   0.7098    
surnameTait                 2.379e+03  1.721e+03   1.382   0.1673    
surnameThomason             2.579e+03  1.835e+03   1.405   0.1603    
surnameWilliamson           3.408e+03  1.780e+03   1.914   0.0558 .  
job_areaLegal               2.798e+02  9.491e+02   0.295   0.7682    
job_areaProduct Management  2.396e+02  9.720e+02   0.247   0.8053    
job_areaSales               7.254e+01  9.458e+02   0.077   0.9389    
job_areaTraining            6.860e+02  9.638e+02   0.712   0.4767    
salary                      3.312e-01  6.586e-02   5.028 5.75e-07 ***
age                         2.473e+02  2.492e+01   9.924  < 2e-16 ***
retiredYes                 -2.120e+04  1.091e+03 -19.425  < 2e-16 ***
locationEdinburgh          -3.907e+02  1.230e+03  -0.318   0.7509    
locationGlasgow             9.128e+01  1.200e+03   0.076   0.9394    
locationInverness           4.338e+02  1.194e+03   0.363   0.7165    
locationOrkney             -1.550e+03  1.223e+03  -1.267   0.2053    
locationShetland           -6.597e+02  1.201e+03  -0.549   0.5829    
locationStirling            1.018e+02  1.250e+03   0.081   0.9351    
locationWestern Isles      -1.211e+03  1.214e+03  -0.998   0.3186    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 9939 on 1147 degrees of freedom
Multiple R-squared:  0.3345,    Adjusted R-squared:  0.3044 
F-statistic: 11.09 on 52 and 1147 DF,  p-value: < 2.2e-16
plot(model_overfit)

model_wellfit <- lm(savings ~ salary + age + retired,
                    data = savings)

summary(model_wellfit)

Call:
lm(formula = savings ~ salary + age + retired, data = savings)

Residuals:
     Min       1Q   Median       3Q      Max 
-31330.5  -6764.2    147.7   6625.2  30518.5 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  2.125e+04  1.659e+03  12.812  < 2e-16 ***
salary       3.376e-01  6.443e-02   5.239  1.9e-07 ***
age          2.463e+02  2.432e+01  10.127  < 2e-16 ***
retiredYes  -2.102e+04  1.066e+03 -19.710  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 9918 on 1196 degrees of freedom
Multiple R-squared:  0.309, Adjusted R-squared:  0.3073 
F-statistic: 178.3 on 3 and 1196 DF,  p-value: < 2.2e-16
plot(model_wellfit)

model_underfit <- lm(savings ~ salary,
                     data = savings)

summary(model_underfit)

Call:
lm(formula = savings ~ salary, data = savings)

Residuals:
   Min     1Q Median     3Q    Max 
-36246  -7847     62   8127  37386 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 2.701e+04  1.547e+03  17.459  < 2e-16 ***
salary      3.466e-01  7.679e-02   4.514 6.99e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 11820 on 1198 degrees of freedom
Multiple R-squared:  0.01672,   Adjusted R-squared:  0.0159 
F-statistic: 20.38 on 1 and 1198 DF,  p-value: 6.992e-06
plot(model_underfit)

Parsimony

  • the overfit model isn’t simple
  • well-fit model is simple
  • underfit model is too simple

Goodness of Fit (GoF)

  • \(r^2\)
    • larger is better
  • ajd \(r^2\)
  • AIC
    • smaller is better
    • Akiake Infotmation Criterion (based on likelihood)
    • not really GoF measure
    • more relative GoF measure
    • used for comparing models
  • BIC
    • smaller is better
    • Baysian Information Criterion (based on likelihood)
    • not really GoF measure
    • more relative GoF measure
    • used for comparing models
    • penalises more strongly than AIC

pull out the adjusted \(r^2\) from the models

summary(model_overfit)$adj.r.squared
[1] 0.304378
summary(model_wellfit)$adj.r.squared
[1] 0.3072727
summary(model_underfit)$adj.r.squared
[1] 0.01590247
AIC(model_overfit)
[1] 25549.42
AIC(model_wellfit)
[1] 25496.62
AIC(model_underfit)
[1] 25915.93
BIC(model_overfit)
[1] 25824.29
BIC(model_wellfit)
[1] 25522.07
BIC(model_underfit)
[1] 25931.2

put model into tidy data

summary(model_overfit)

Call:
lm(formula = savings ~ ., data = savings)

Residuals:
   Min     1Q Median     3Q    Max 
-32617  -6684   -264   6481  31884 

Coefficients: (1 not defined because of singularities)
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                 2.050e+04  2.591e+03   7.915 5.81e-15 ***
genderMale                 -1.049e+03  1.706e+03  -0.615   0.5386    
nameAngus                   1.531e+03  1.755e+03   0.873   0.3830    
nameBert                   -1.973e+02  1.726e+03  -0.114   0.9090    
nameBonnie                  1.261e+03  1.809e+03   0.697   0.4859    
nameCara                   -8.515e+02  1.807e+03  -0.471   0.6376    
nameCharles                 2.419e+03  1.786e+03   1.354   0.1759    
nameDonald                  1.253e+02  1.823e+03   0.069   0.9452    
nameDora                   -5.149e+02  1.995e+03  -0.258   0.7964    
nameEd                      1.249e+03  1.693e+03   0.738   0.4606    
nameEmmy                   -1.062e+03  1.816e+03  -0.585   0.5587    
nameFlorence                9.245e+02  1.833e+03   0.504   0.6142    
nameFreddy                  6.287e+02  1.792e+03   0.351   0.7257    
nameGilly                  -1.518e+03  1.764e+03  -0.861   0.3896    
nameGord                   -1.246e+02  1.756e+03  -0.071   0.9435    
nameHarry                   3.218e+02  1.742e+03   0.185   0.8535    
nameHelena                  4.456e+02  1.748e+03   0.255   0.7988    
nameIndia                   9.007e+02  1.785e+03   0.505   0.6139    
nameIvan                    9.941e+02  1.780e+03   0.559   0.5766    
nameJimmy                          NA         NA      NA       NA    
nameJools                   6.287e+02  1.772e+03   0.355   0.7228    
surnameFraser              -1.113e+03  1.673e+03  -0.665   0.5060    
surnameGaldie               2.034e+03  1.839e+03   1.106   0.2689    
surnameHalcrow             -7.546e+02  1.819e+03  -0.415   0.6783    
surnameHenderson           -3.788e+02  1.751e+03  -0.216   0.8288    
surnameIrvine               3.610e+03  1.713e+03   2.108   0.0353 *  
surnameJamieson             4.129e+02  1.831e+03   0.226   0.8216    
surnameJohnson              4.237e+02  1.826e+03   0.232   0.8165    
surnameLaurenceso          -3.532e+02  1.839e+03  -0.192   0.8478    
surnameLeask                2.301e+03  1.697e+03   1.356   0.1754    
surnameManson               1.409e+03  1.670e+03   0.843   0.3992    
surnameMowat                9.803e+02  1.699e+03   0.577   0.5641    
surnameNicolson            -1.051e+03  1.816e+03  -0.578   0.5631    
surnamePeterson             3.189e+03  1.706e+03   1.869   0.0618 .  
surnameRobertson            2.960e+03  1.650e+03   1.794   0.0731 .  
surnameSinclair             4.578e+02  1.790e+03   0.256   0.7982    
surnameSmith                6.355e+02  1.707e+03   0.372   0.7098    
surnameTait                 2.379e+03  1.721e+03   1.382   0.1673    
surnameThomason             2.579e+03  1.835e+03   1.405   0.1603    
surnameWilliamson           3.408e+03  1.780e+03   1.914   0.0558 .  
job_areaLegal               2.798e+02  9.491e+02   0.295   0.7682    
job_areaProduct Management  2.396e+02  9.720e+02   0.247   0.8053    
job_areaSales               7.254e+01  9.458e+02   0.077   0.9389    
job_areaTraining            6.860e+02  9.638e+02   0.712   0.4767    
salary                      3.312e-01  6.586e-02   5.028 5.75e-07 ***
age                         2.473e+02  2.492e+01   9.924  < 2e-16 ***
retiredYes                 -2.120e+04  1.091e+03 -19.425  < 2e-16 ***
locationEdinburgh          -3.907e+02  1.230e+03  -0.318   0.7509    
locationGlasgow             9.128e+01  1.200e+03   0.076   0.9394    
locationInverness           4.338e+02  1.194e+03   0.363   0.7165    
locationOrkney             -1.550e+03  1.223e+03  -1.267   0.2053    
locationShetland           -6.597e+02  1.201e+03  -0.549   0.5829    
locationStirling            1.018e+02  1.250e+03   0.081   0.9351    
locationWestern Isles      -1.211e+03  1.214e+03  -0.998   0.3186    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 9939 on 1147 degrees of freedom
Multiple R-squared:  0.3345,    Adjusted R-squared:  0.3044 
F-statistic: 11.09 on 52 and 1147 DF,  p-value: < 2.2e-16
broom::glance(model_overfit)

Test / train sets

    • Split data before looking at it
    • Basic cleaning can be done
    • Don’t explore patterns or relationships
    • Create a test set
    • Use as much data as possible for training set

Get our test and train sets

Fit a model to the TRAINING set

calculate the mean squared error

predictions_test <- predictions_test %>% 
  mutate(sq_err = (pred - savings)^2)

mse_test <- mean(predictions_test$sq_err)
mse_test # normally this would be sqrt'd -> RMSE
[1] 106665166
sqrt(mse_test)
[1] 10327.88
predictions_train <- train %>% 
  add_predictions(model) %>% 
  select(savings, pred)

predictions_train
predictions_train <- predictions_train %>% 
  mutate(sq_err = (pred - savings) ^ 2)

mse_train <- mean(predictions_train$sq_err)
mse_train
[1] 96034515
sqrt(mse_train)
[1] 9799.72

Bias - Variance Trade-Off

  • model with high bias (low variance)
    • won’t match the data set closely
  • model with low bias (high variance)
    • will match the data set very closely

K-fold Cross Variation

model_cv
Linear Regression 

1200 samples
   3 predictor

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 1080, 1080, 1080, 1080, 1080, 1080, ... 
Resampling results:

  RMSE      Rsquared   MAE     
  9896.834  0.3152612  7902.707

Tuning parameter 'intercept' was held constant at a value of TRUE
model_cv$pred
model_cv$resample
mean(model_cv$resample$RMSE)
[1] 9896.834
model_cv_all
Linear Regression 

1200 samples
   8 predictor

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 1080, 1080, 1080, 1080, 1080, 1080, ... 
Resampling results:

  RMSE      Rsquared   MAE     
  10194.96  0.2730269  8120.347

Tuning parameter 'intercept' was held constant at a value of TRUE
model_cv_all$resample %>% 
  ggplot(aes(x = Resample,
             y = RMSE))+
  geom_col()

mean(model_cv_all$resample$Rsquared)
[1] 0.2730269
mean(model_cv_all$resample$RMSE)
[1] 10194.96

Test, Training and Validation Sets

  • test: 20%

  • train: 60%

  • validation: 20%

  • fit several models with varying hyperparameters

  • find the best combination of hyperparams for each type of model

  • use the validation set to choose the hyperparams

  • re-train model on entire training set (train + validation)

Avoiding Leaks

How would you handle NAs????

  • if you impute before you split
    • imputed values in the test set have been influenced by the training set
  • if you impute after you split
    • clean
    • explore (minimally)
    • . split
    • . training set
    • imputations ****
    • train model
    • validate
    • . test set
    • imputations ****

We can prevent data leakage by ensuring our pre-processing is done in the training dataset separately from our testing/validation set, as well as ensuring the variables we select are useful predictors that would be available to us at the point that we want to apply our model

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KG1vZGVscikKbGlicmFyeShjYXJldCkKYGBgCgoKYGBge3J9CnNhdmluZ3MgPC0gQ29kZUNsYW5EYXRhOjpzYXZpbmdzCnNhdmluZ3MKYGBgCgoKIyBEYXRhID0gUGF0dGVybiArIE5vaXNlCgoKYGBge3J9Cm1vZGVsX292ZXJmaXQgPC0gbG0oc2F2aW5ncyB+IC4sCiAgICAgICAgICAgICAgICAgICAgZGF0YSA9IHNhdmluZ3MpCgpzdW1tYXJ5KG1vZGVsX292ZXJmaXQpCmBgYAoKCmBgYHtyfQpwbG90KG1vZGVsX292ZXJmaXQpCmBgYAoKCmBgYHtyfQptb2RlbF93ZWxsZml0IDwtIGxtKHNhdmluZ3MgfiBzYWxhcnkgKyBhZ2UgKyByZXRpcmVkLAogICAgICAgICAgICAgICAgICAgIGRhdGEgPSBzYXZpbmdzKQoKc3VtbWFyeShtb2RlbF93ZWxsZml0KQpgYGAKCgpgYGB7cn0KcGxvdChtb2RlbF93ZWxsZml0KQpgYGAKCgpgYGB7cn0KbW9kZWxfdW5kZXJmaXQgPC0gbG0oc2F2aW5ncyB+IHNhbGFyeSwKICAgICAgICAgICAgICAgICAgICAgZGF0YSA9IHNhdmluZ3MpCgpzdW1tYXJ5KG1vZGVsX3VuZGVyZml0KQpgYGAKCgpgYGB7cn0KcGxvdChtb2RlbF91bmRlcmZpdCkKYGBgCgoKIyMgUGFyc2ltb255CgoqIHRoZSBvdmVyZml0IG1vZGVsIGlzbid0IHNpbXBsZQoqIHdlbGwtZml0IG1vZGVsIGlzIHNpbXBsZQoqIHVuZGVyZml0IG1vZGVsIGlzIHRvbyBzaW1wbGUKCiMjIyMgR29vZG5lc3Mgb2YgRml0IChHb0YpCgoqICRyXjIkCiAgKyBsYXJnZXIgaXMgYmV0dGVyCiogYWpkICRyXjIkIAoqIEFJQwogICsgc21hbGxlciBpcyBiZXR0ZXIKICArIEFraWFrZSBJbmZvdG1hdGlvbiBDcml0ZXJpb24gKGJhc2VkIG9uIGxpa2VsaWhvb2QpCiAgKyBub3QgcmVhbGx5IEdvRiBtZWFzdXJlCiAgKyBtb3JlIF9fcmVsYXRpdmVfXyBHb0YgbWVhc3VyZQogICsgdXNlZCBmb3IgX19jb21wYXJpbmdfXyBtb2RlbHMKKiBCSUMKICArIHNtYWxsZXIgaXMgYmV0dGVyCiAgKyBCYXlzaWFuIEluZm9ybWF0aW9uIENyaXRlcmlvbiAoYmFzZWQgb24gbGlrZWxpaG9vZCkKICArIG5vdCByZWFsbHkgR29GIG1lYXN1cmUKICArIG1vcmUgX19yZWxhdGl2ZV9fIEdvRiBtZWFzdXJlCiAgKyB1c2VkIGZvciBfX2NvbXBhcmluZ19fIG1vZGVscwogICsgcGVuYWxpc2VzIF9fbW9yZSBzdHJvbmdseV9fIHRoYW4gQUlDCiAgCgpwdWxsIG91dCB0aGUgYWRqdXN0ZWQgJHJeMiQgZnJvbSB0aGUgbW9kZWxzCmBgYHtyfQpzdW1tYXJ5KG1vZGVsX292ZXJmaXQpJGFkai5yLnNxdWFyZWQKc3VtbWFyeShtb2RlbF93ZWxsZml0KSRhZGouci5zcXVhcmVkCnN1bW1hcnkobW9kZWxfdW5kZXJmaXQpJGFkai5yLnNxdWFyZWQKYGBgCgoKYGBge3J9CkFJQyhtb2RlbF9vdmVyZml0KQpBSUMobW9kZWxfd2VsbGZpdCkKQUlDKG1vZGVsX3VuZGVyZml0KQpgYGAKCgpgYGB7cn0KQklDKG1vZGVsX292ZXJmaXQpCkJJQyhtb2RlbF93ZWxsZml0KQpCSUMobW9kZWxfdW5kZXJmaXQpCmBgYAoKCnB1dCBtb2RlbCBpbnRvIHRpZHkgZGF0YQpgYGB7cn0Kc3VtbWFyeShtb2RlbF9vdmVyZml0KQpicm9vbTo6Z2xhbmNlKG1vZGVsX292ZXJmaXQpCmBgYAoKCiMjIFRlc3QgLyB0cmFpbiBzZXRzCgoqICogX19TcGxpdCBkYXRhIGJlZm9yZSBsb29raW5nIGF0IGl0X18KKiAqIF9fQmFzaWMgY2xlYW5pbmcgY2FuIGJlIGRvbmVfXwoqICogX19Eb24ndCBleHBsb3JlIHBhdHRlcm5zIG9yIHJlbGF0aW9uc2hpcHNfXwoqICogX19DcmVhdGUgYSB0ZXN0IHNldF9fCiogKiBfX1VzZSBhcyBtdWNoIGRhdGEgYXMgcG9zc2libGUgZm9yIHRyYWluaW5nIHNldF9fCgpHZXQgb3VyIHRlc3QgYW5kIHRyYWluIHNldHMKCmBgYHtyfQpzZXQuc2VlZCg5KQoKbl9kYXRhIDwtIG5yb3coc2F2aW5ncykKCnRlc3RfaW5kZXggPC0gc2FtcGxlKDE6bl9kYXRhLCBzaXplID0gbl9kYXRhICogMC4yKQoKdGVzdCA8LSBzbGljZShzYXZpbmdzLCB0ZXN0X2luZGV4KQp0cmFpbiA8LSBzbGljZShzYXZpbmdzLCAtdGVzdF9pbmRleCkKYGBgCgoKRml0IGEgbW9kZWwgdG8gdGhlIF9fVFJBSU5JTkdfXyBzZXQKCmBgYHtyfQptb2RlbCA8LSBsbSggc2F2aW5ncyB+IHNhbGFyeSArIGFnZSArIHJldGlyZWQsCiAgICAgICAgICAgICBkYXRhID0gdHJhaW4pCgphdXRvcGxvdChtb2RlbCkKYGBgCgoKYGBge3J9CnByZWRpY3Rpb25zX3Rlc3QgPC0gdGVzdCAlPiUgCiAgYWRkX3ByZWRpY3Rpb25zKG1vZGVsKSAlPiUgCiAgc2VsZWN0KHNhdmluZ3MsIHByZWQpCgpwcmVkaWN0aW9uc190ZXN0CmBgYAoKY2FsY3VsYXRlIHRoZSBtZWFuIHNxdWFyZWQgZXJyb3IKCmBgYHtyfQpwcmVkaWN0aW9uc190ZXN0IDwtIHByZWRpY3Rpb25zX3Rlc3QgJT4lIAogIG11dGF0ZShzcV9lcnIgPSAocHJlZCAtIHNhdmluZ3MpXjIpCgptc2VfdGVzdCA8LSBtZWFuKHByZWRpY3Rpb25zX3Rlc3Qkc3FfZXJyKQptc2VfdGVzdCAjIG5vcm1hbGx5IHRoaXMgd291bGQgYmUgc3FydCdkIC0+IFJNU0UKCnNxcnQobXNlX3Rlc3QpCmBgYAoKCmBgYHtyfQpwcmVkaWN0aW9uc190cmFpbiA8LSB0cmFpbiAlPiUgCiAgYWRkX3ByZWRpY3Rpb25zKG1vZGVsKSAlPiUgCiAgc2VsZWN0KHNhdmluZ3MsIHByZWQpCgpwcmVkaWN0aW9uc190cmFpbgpgYGAKCgpgYGB7cn0KcHJlZGljdGlvbnNfdHJhaW4gPC0gcHJlZGljdGlvbnNfdHJhaW4gJT4lIAogIG11dGF0ZShzcV9lcnIgPSAocHJlZCAtIHNhdmluZ3MpIF4gMikKCm1zZV90cmFpbiA8LSBtZWFuKHByZWRpY3Rpb25zX3RyYWluJHNxX2VycikKbXNlX3RyYWluCgpzcXJ0KG1zZV90cmFpbikKYGBgCgoKIyMgQmlhcyAtIFZhcmlhbmNlIFRyYWRlLU9mZgoKKiBtb2RlbCB3aXRoIF9faGlnaCBiaWFzIChsb3cgdmFyaWFuY2UpX18gCiAgKyB3b24ndCBtYXRjaCB0aGUgZGF0YSBzZXQgY2xvc2VseQoqIG1vZGVsIHdpdGggX19sb3cgYmlhcyAoaGlnaCB2YXJpYW5jZSlfXyAKICArIHdpbGwgbWF0Y2ggdGhlIGRhdGEgc2V0IHZlcnkgY2xvc2VseQoKCiMjIEstZm9sZCBDcm9zcyBWYXJpYXRpb24KCmBgYHtyfQojIGNyZWF0ZSBjb250cm9sIHNldHRpbmdzCmN2XzEwX2ZvbGQgPC0gdHJhaW5Db250cm9sKG1ldGhvZCA9ICAiY3YiLAogICAgICAgICAgICAgICAgICAgICAgICAgICBudW1iZXIgPSAxMCwKICAgICAgICAgICAgICAgICAgICAgICAgICAgc2F2ZVByZWRpY3Rpb25zID0gVFJVRSkKCm1vZGVsX2N2IDwtIHRyYWluKHNhdmluZ3MgfiBzYWxhcnkgKyBhZ2UgKyByZXRpcmVkLAogICAgICAgICAgICAgICAgICBkYXRhID0gc2F2aW5ncywKICAgICAgICAgICAgICAgICAgdHJDb250cm9sID0gY3ZfMTBfZm9sZCwKICAgICAgICAgICAgICAgICAgbWV0aG9kID0gImxtIikKYGBgCgoKYGBge3J9Cm1vZGVsX2N2JHByZWQKYGBgCgoKYGBge3J9Cm1vZGVsX2N2JHJlc2FtcGxlCmBgYAoKCmBgYHtyfQptZWFuKG1vZGVsX2N2JHJlc2FtcGxlJFJNU0UpCmBgYAoKCmBgYHtyfQptb2RlbF9jdl9hbGwgPC0gdHJhaW4oc2F2aW5ncyB+IC4sCiAgICAgICAgICAgICAgICAgIGRhdGEgPSBzYXZpbmdzLAogICAgICAgICAgICAgICAgICB0ckNvbnRyb2wgPSBjdl8xMF9mb2xkLAogICAgICAgICAgICAgICAgICBtZXRob2QgPSAibG0iKQpgYGAKCgpgYGB7cn0KbW9kZWxfY3ZfYWxsJHJlc2FtcGxlCmBgYAoKCmBgYHtyfQptb2RlbF9jdl9hbGwkcmVzYW1wbGUgJT4lIAogIGdncGxvdChhZXMoeCA9IFJlc2FtcGxlLAogICAgICAgICAgICAgeSA9IFJNU0UpKSsKICBnZW9tX2NvbCgpCmBgYAoKCmBgYHtyfQptZWFuKG1vZGVsX2N2X2FsbCRyZXNhbXBsZSRSc3F1YXJlZCkKbWVhbihtb2RlbF9jdl9hbGwkcmVzYW1wbGUkUk1TRSkKYGBgCgoKIyMgVGVzdCwgVHJhaW5pbmcgYW5kIFZhbGlkYXRpb24gU2V0cwoKKiB0ZXN0OiAyMCUKKiB0cmFpbjogNjAlCiogdmFsaWRhdGlvbjogMjAlCgoqIGZpdCBzZXZlcmFsIG1vZGVscyB3aXRoIHZhcnlpbmcgX19oeXBlcnBhcmFtZXRlcnNfXwoqIGZpbmQgdGhlIGJlc3QgY29tYmluYXRpb24gb2YgaHlwZXJwYXJhbXMgZm9yIGVhY2ggX190eXBlX18gb2YgbW9kZWwKKiB1c2UgdGhlIF9fdmFsaWRhdGlvbl9fIHNldCB0byBjaG9vc2UgdGhlIGh5cGVycGFyYW1zCiogcmUtdHJhaW4gbW9kZWwgb24gZW50aXJlIHRyYWluaW5nIHNldCAodHJhaW4gKyB2YWxpZGF0aW9uKQoKCiMjIEF2b2lkaW5nIExlYWtzCgojIyMjIEhvdyB3b3VsZCB5b3UgaGFuZGxlIE5Bcz8/Pz8KCiogaWYgeW91IGltcHV0ZSBfX2JlZm9yZV9fIHlvdSBzcGxpdAogICsgaW1wdXRlZCB2YWx1ZXMgaW4gdGhlIHRlc3Qgc2V0IGhhdmUgYmVlbiBfX2luZmx1ZW5jZWRfXyBieSB0aGUgdHJhaW5pbmcgc2V0CiogaWYgeW91IGltcHV0ZSBfX2FmdGVyX18geW91IHNwbGl0CiAgKyBjbGVhbgogICsgZXhwbG9yZSAobWluaW1hbGx5KQogICsgLiBfX3NwbGl0X18KICArIC4gX190cmFpbmluZyBzZXRfXwogICsgaW1wdXRhdGlvbnMgKioqKgogICsgdHJhaW4gbW9kZWwKICArIHZhbGlkYXRlCiAgKyAuIF9fdGVzdCBzZXRfXwogICsgaW1wdXRhdGlvbnMgKioqKgogIApfX1dlIGNhbiBwcmV2ZW50IGRhdGEgbGVha2FnZSBieSBlbnN1cmluZyBvdXIgcHJlLXByb2Nlc3NpbmcgaXMgZG9uZSBpbiB0aGUgdHJhaW5pbmcgZGF0YXNldCBzZXBhcmF0ZWx5IGZyb20gb3VyIHRlc3RpbmcvdmFsaWRhdGlvbiBzZXQsIGFzIHdlbGwgYXMgZW5zdXJpbmcgdGhlIHZhcmlhYmxlcyB3ZSBzZWxlY3QgYXJlIHVzZWZ1bCBwcmVkaWN0b3JzIHRoYXQgd291bGQgYmUgYXZhaWxhYmxlIHRvIHVzIGF0IHRoZSBwb2ludCB0aGF0IHdlIHdhbnQgdG8gYXBwbHkgb3VyIG1vZGVsX18KCgo=